# 安装依赖 pip install llama-index-embeddings-openai
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import (
    SentenceSplitter,
    SemanticSplitterNodeParser,
)

from config.embeddings import embed_model_local_bge_small

# 加载指定目录下的文件
documents = SimpleDirectoryReader(input_files=['../data/airline-service.txt']).load_data()

# 定义向量模型
embed_model = embed_model_local_bge_small()

#定义分割模块大小
sentence_splitter = SentenceSplitter(
    chunk_size=512,
    chunk_overlap=100
)

#定义文档块分割器
splitter = SemanticSplitterNodeParser(
    buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model
)

nodes = splitter.get_nodes_from_documents(documents)
print(nodes)
